knitr::opts_chunk$set(echo = TRUE, message = FALSE)
library(Seurat)
library(ggplot2)
library(data.table)
library(MAST)
library(SingleR)
library(dplyr)
library(tidyr)
library(limma)
library(ggrepel)## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.4
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] ggrepel_0.8.2 limma_3.44.3
## [3] tidyr_1.1.1 dplyr_1.0.2
## [5] SingleR_1.2.4 MAST_1.14.0
## [7] SingleCellExperiment_1.10.1 SummarizedExperiment_1.18.2
## [9] DelayedArray_0.14.1 matrixStats_0.56.0
## [11] Biobase_2.48.0 GenomicRanges_1.40.0
## [13] GenomeInfoDb_1.24.2 IRanges_2.22.2
## [15] S4Vectors_0.26.1 BiocGenerics_0.34.0
## [17] data.table_1.13.0 ggplot2_3.3.2
## [19] Seurat_3.2.0
##
## loaded via a namespace (and not attached):
## [1] AnnotationHub_2.20.1 BiocFileCache_1.12.1
## [3] plyr_1.8.6 igraph_1.2.5
## [5] lazyeval_0.2.2 splines_4.0.2
## [7] BiocParallel_1.22.0 listenv_0.8.0
## [9] digest_0.6.25 htmltools_0.5.0
## [11] magrittr_1.5 memoise_1.1.0
## [13] tensor_1.5 cluster_2.1.0
## [15] ROCR_1.0-11 globals_0.12.5
## [17] colorspace_1.4-1 blob_1.2.1
## [19] rappdirs_0.3.1 xfun_0.16
## [21] crayon_1.3.4 RCurl_1.98-1.2
## [23] jsonlite_1.7.0 spatstat_1.64-1
## [25] spatstat.data_1.4-3 survival_3.2-3
## [27] zoo_1.8-8 ape_5.4-1
## [29] glue_1.4.1 polyclip_1.10-0
## [31] gtable_0.3.0 zlibbioc_1.34.0
## [33] XVector_0.28.0 leiden_0.3.3
## [35] BiocSingular_1.4.0 future.apply_1.6.0
## [37] abind_1.4-5 scales_1.1.1
## [39] DBI_1.1.0 miniUI_0.1.1.1
## [41] Rcpp_1.0.5 viridisLite_0.3.0
## [43] xtable_1.8-4 reticulate_1.16
## [45] bit_4.0.4 rsvd_1.0.3
## [47] htmlwidgets_1.5.1 httr_1.4.2
## [49] RColorBrewer_1.1-2 ellipsis_0.3.1
## [51] ica_1.0-2 pkgconfig_2.0.3
## [53] uwot_0.1.8 dbplyr_1.4.4
## [55] deldir_0.1-28 tidyselect_1.1.0
## [57] rlang_0.4.7 reshape2_1.4.4
## [59] later_1.1.0.1 AnnotationDbi_1.50.3
## [61] munsell_0.5.0 BiocVersion_3.11.1
## [63] tools_4.0.2 generics_0.0.2
## [65] RSQLite_2.2.0 ExperimentHub_1.14.1
## [67] ggridges_0.5.2 evaluate_0.14
## [69] stringr_1.4.0 fastmap_1.0.1
## [71] yaml_2.2.1 goftest_1.2-2
## [73] knitr_1.29 bit64_4.0.2
## [75] fitdistrplus_1.1-1 purrr_0.3.4
## [77] RANN_2.6.1 pbapply_1.4-3
## [79] future_1.18.0 nlme_3.1-148
## [81] mime_0.9 compiler_4.0.2
## [83] plotly_4.9.2.1 curl_4.3
## [85] png_0.1-7 interactiveDisplayBase_1.26.3
## [87] spatstat.utils_1.17-0 tibble_3.0.3
## [89] stringi_1.4.6 lattice_0.20-41
## [91] Matrix_1.2-18 vctrs_0.3.2
## [93] pillar_1.4.6 lifecycle_0.2.0
## [95] BiocManager_1.30.10 lmtest_0.9-37
## [97] RcppAnnoy_0.0.16 BiocNeighbors_1.6.0
## [99] cowplot_1.0.0 bitops_1.0-6
## [101] irlba_2.3.3 httpuv_1.5.4
## [103] patchwork_1.0.1 R6_2.4.1
## [105] promises_1.1.1 KernSmooth_2.23-17
## [107] gridExtra_2.3 codetools_0.2-16
## [109] MASS_7.3-52 assertthat_0.2.1
## [111] withr_2.2.0 sctransform_0.2.1
## [113] GenomeInfoDbData_1.2.3 mgcv_1.8-31
## [115] grid_4.0.2 rpart_4.1-15
## [117] rmarkdown_2.3 DelayedMatrixStats_1.10.1
## [119] Rtsne_0.15 shiny_1.5.0
#wbm <- readRDS('./data/v2/combined.integrated.rds')
wbm <- readRDS('./data/V2/lesser.combined.integrated.rds')In v2 of the analysis we decided to include the control mice from the Nbeal experiment with the Migr1 and Mpl mice. The thought is that it may be good to have another control, since the Migr1 control has irradiated and had a bone marrow transplantation. I’m going to split the Rmarkdown files into separate part, to better organize my analysis.
Here I’m gather the cluster centroids for the different 15 different clusters before and after cca correction.
Getting cluster centroids before cca (‘RNA’ assay) and after cca (‘integrated’ assay)
# Getting cluster centroids for all assays
av.expression <- AverageExpression(wbm)
# Subsetting the RNA assay data frame to only genes in integrated assay
av.expression$RNA <- av.expression$RNA[rownames(av.expression$RNA) %in% rownames(av.expression$integrated),]
# Adding distinguishing names to merge into one data frame
colnames(av.expression$integrated) <- paste0("Integrated-",0:14)
colnames(av.expression$RNA) <- paste0("RNA-", 0:14)
# Combining into one data frame
av.exp <- cbind(av.expression$RNA, av.expression$integrated)
# Writing it out to a table for Jun to analyze
# write.table(av.exp, './data/v2/cluster.centroids.before.after.cca.txt', sep = '\t', quote = F)Wanted to see how the centroids distinguished themselves
## [1] 87.7330768 43.4970733 42.4476551 40.7532825 8.3004814 8.1825628
## [7] 6.7821755 6.5915510 5.5046258 5.3172070 4.0254913 3.9634890
## [13] 2.8131370 2.6846944 2.2243166 2.1660917 2.1487000 2.0454804
## [19] 1.9819829 1.9172511 1.6886617 1.6613350 1.5123164 1.4562874
## [25] 1.3314026 1.2987749 0.3695226 0.3533690 0.2377896 0.2339423
pca.av.exp <- as.data.frame(pca.av.exp$rotation)
pca.av.exp$assay <- tstrsplit(rownames(pca.av.exp),'-')[[1]]
pca.av.exp$cluster <- tstrsplit(rownames(pca.av.exp),'-')[[2]]
ggplot(pca.av.exp, aes(x = PC1, y = PC2, color = assay)) +
geom_point() + theme_bw()ggplot(pca.av.exp, aes(x = PC1, y = PC2, color = cluster,label = cluster)) +
geom_point() + theme_bw() + geom_text_repel()# DimPlot(wbm, reduction = 'umap', label = T, repel = T)
#
ggplot(pca.av.exp, aes(x = PC3, y = PC4, label = cluster, color = assay)) +
geom_point () +
geom_text_repel() +
theme_bw()# ggplot(pca.av.exp, aes(x = PC4, y = PC5, label = cluster, color = assay)) +
# geom_point () +
# geom_text_repel() +
# theme_bw()
#
# ggplot(pca.av.exp, aes(x = PC6, y = PC7, label = cluster, color = assay)) +
# geom_point () +
# geom_text_repel() +
# theme_bw()It seems that each PC is distinguishing a particular cluster. This does make sense because that’s in general how the clusters were created.
Doing the same thing as before but further dividing the centroids for each condition (enriched or normal; Migr1, Mpl, Nbeal)
# Getting cluster centroids for each experiments for all assay
av.expression.by.condition <- AverageExpression(wbm, add.ident = 'Condition')
# Subsetting the RNA assay data frame to only genes in integrated assay
av.expression.by.condition$RNA <-
av.expression.by.condition$RNA[rownames(av.expression.by.condition$RNA) %in%
rownames(av.expression.by.condition$integrated),]
# Getting unique colnames
colnames(av.expression.by.condition$integrated) <- paste0("Integrated-",colnames(av.expression.by.condition$integrated))
colnames(av.expression.by.condition$RNA) <- paste0("RNA-", colnames(av.expression.by.condition$RNA))
# Combining into one data frame
av.exp.by.cond <- cbind(av.expression.by.condition$RNA, av.expression.by.condition$integrated)
# Writing it out to a table for Jun to use
# write.table(av.exp.by.cond, './data/v2/cluster.centroids.before.after.cca.by.condition.txt',
# sep = '\t',
# quote = F)## [1] 194.342051 147.302812 113.007868 108.799772 47.365515 45.667525
## [7] 37.517217 34.021108 20.035359 19.962408 19.017627 18.415837
## [13] 14.399119 13.760190 13.601906 12.290127 9.821881 9.345166
## [19] 7.717798 7.508813 7.260316 6.360444 6.298828 5.864222
## [25] 5.698218 5.423408 5.351688 5.040190 4.761992 4.631636
pca.av.exp <- as.data.frame(pca.av.exp$rotation)
pca.av.exp$assay <- tstrsplit(rownames(pca.av.exp),'-')[[1]]
pca.av.exp$cluster <- tstrsplit(tstrsplit(rownames(pca.av.exp),'-')[[2]],'_')[[1]]
pca.av.exp$condition <- tstrsplit(tstrsplit(rownames(pca.av.exp),'-')[[2]],'_')[[2]]
ggplot(pca.av.exp, aes(x = PC1, y = PC2, color = cluster, shape = condition)) +
geom_point() + theme_bw()So doing each of these separately (Mpl/Migr1 and Nbeal) and getting clusters. For each cluster getting the cell identity and cluster centroids. Then compare these to the final clusters to see how centroids compare. Do some original clusters get split into different centroids, or just combine across experiments easily.
Also going to get cell count cross tabulation from clusters before and after integration
For generating the clusters, I’m not sure if it’s better to have more or less clusters in the end. Going to generate two sets of clusters to then compare downstream.
# Most of this is being copied from v2.1.Integration.UMAP.Rmd
# Except here I am also generating clusters integration
wbm.data <- Read10X(data.dir = './data/filtered_feature_bc_matrix//')
# Creating Seurat Object
wbm <- CreateSeuratObject(counts = wbm.data, project = 'Mpl', min.cells = 3, min.features = 200)
# Reading in the HTO labels provided by Dr. Brian Parkin
htos <- read.csv('./data/HTOs.csv')
#dim(htos)
# Removing all the cells without a label
htos <- htos[htos$HTOs != '',]
#dim(htos)
# Adding the conditioni from the HTO tagged, from an e-mail from Priya
htos$condition <- ifelse(htos$HTOs == 'HTO-1', 'Mpl',
ifelse(htos$HTOs == 'HTO-2', 'enrMpl',
ifelse(htos$HTOs == 'HTO-3', 'Migr1','enrMigr1')))
# Subsetting the two data frames so they only include cells that overlap
wbm <- wbm[,colnames(wbm) %in% htos$Barcode]
htos <- htos[htos$Barcode %in% colnames(wbm),]
# Making sure the cell order is maintained between the two dataframes, so I can
# just add the condition to the meta data
#summary(rownames(wbm@meta.data) == htos$Barcode)
# Adding the condition to the meta data
wbm@meta.data$Condition <- htos$condition
wbm[['percent.mt']] <- PercentageFeatureSet(wbm, pattern = '^mt')
less.subset.wbm <- subset(wbm, subset = nFeature_RNA > 500 & percent.mt < 10)cnt.data <- Read10X(data.dir = './data/Experiment2/filtered_feature_bc_matrix/')
cnt <- CreateSeuratObject(counts = cnt.data, project = 'Nbeal', min.cells = 3, min.features = 200)
# Getting the HTOs
nbeal_hto <- read.table('./data/Experiment2/hto_labels.txt')
nbeal_hto <- nbeal_hto[nbeal_hto$V2 %in% c('HTO3','HTO4'),]
nbeal_hto$condition <- ifelse(nbeal_hto$V2 == 'HTO3', 'Nbeal_cntrl', 'enrNbeal_cntrl')
nbeal_hto$cell <- paste0(nbeal_hto$V1, '-1')
# summary(nbeal_hto$cell %in% colnames(cnt))
cnt <- cnt[,colnames(cnt) %in% nbeal_hto$cell]
# Making sure the cell order is maintained between the two dataframes, so I can
# just add the condition to the meta data
#summary(rownames(wbm@meta.data) == htos$Barcode)
# Adding the condition to the meta data
cnt@meta.data$Condition <- nbeal_hto$condition
cnt[['percent.mt']] <- PercentageFeatureSet(cnt, pattern = '^mt')
less.cnt <- subset(cnt, subset = nFeature_RNA > 500 & percent.mt < 10)less.subset.wbm <- NormalizeData(less.subset.wbm, verbose = F)
less.subset.wbm <- FindVariableFeatures(less.subset.wbm,
selection.method = 'vst',
nfeatures = 2000,
verbose = F)
less.subset.wbm <- ScaleData(less.subset.wbm, verbose = F)
less.subset.wbm <- RunPCA(less.subset.wbm, features = VariableFeatures(less.subset.wbm))
ElbowPlot(less.subset.wbm)# choosing 15 PCs
less.subset.wbm <- FindNeighbors(less.subset.wbm, dims = 1:15)
res <- seq(0,1, by = 0.05)
clstrs <- c()
for (i in res){
x <- FindClusters(less.subset.wbm, resolution = i, verbose = F)
clstrs <- c(clstrs, length(unique(x$seurat_clusters)))
}
plot(res,clstrs)# Going with .2 and .7
less.subset.wbm <- FindClusters(less.subset.wbm, resolution = .2, verbose = F)
less.subset.wbm <- FindClusters(less.subset.wbm, resolution = .7, verbose = F)
less.subset.wbm <- RunUMAP(less.subset.wbm, dims = 1:15)## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
DimPlot(less.subset.wbm, reduction = 'umap', label = T, repel = T) + NoLegend() + ggtitle ('Resolution 0.7')DimPlot(less.subset.wbm, reduction = 'umap', label = T, repel = T, group.by = 'RNA_snn_res.0.2') +
NoLegend() + ggtitle ('Resolution 0.2')less.cnt <- NormalizeData(less.cnt, verbose = F)
less.cnt <- FindVariableFeatures(less.cnt,
selection.method = 'vst',
nfeatures = 2000,
verbose = F)
less.cnt <- ScaleData(less.cnt, verbose = F)
less.cnt <- RunPCA(less.cnt, features = VariableFeatures(less.cnt))
ElbowPlot(less.cnt)# choosing 15 PCs
less.cnt <- FindNeighbors(less.cnt, dims = 1:15)
res <- seq(0,1, by = 0.05)
clstrs <- c()
for (i in res){
x <- FindClusters(less.cnt, resolution = i, verbose = F)
clstrs <- c(clstrs, length(unique(x$seurat_clusters)))
}
plot(res,clstrs)# Going with .2 and .7
less.cnt <- FindClusters(less.cnt, resolution = .2, verbose = F)
less.cnt <- FindClusters(less.cnt, resolution = .7, verbose = F)
less.cnt <- RunUMAP(less.cnt, dims = 1:15)
DimPlot(less.cnt, reduction = 'umap', label = T, repel = T) + NoLegend() + ggtitle ('Resolution 0.7')DimPlot(less.cnt, reduction = 'umap', label = T, repel = T, group.by = 'RNA_snn_res.0.2') +
NoLegend() + ggtitle ('Resolution 0.2')wbm <- readRDS('./data/v2/lesser.combined.integrated.rds')
wbm$State <- wbm$Condition
wbm$Condition <- ifelse(grepl('enr', wbm$Condition), 'Enriched', 'Not enriched')
wbm$Experiment <- ifelse(grepl('Mpl', wbm$State), 'Mpl',
ifelse(grepl('Migr', wbm$State), 'Migr1', 'Control'))
sumry <- read.table('./data/v2/summary_naming.tsv', header = T, sep = '\t')
# sumry
# new_levels <- sumry$final
new_levels <- c('Gran-1','Gran-2','SC','B cell-1','Gran-3','Monocyte','MEP/Mast',
'?Prog','Macrophage','B cell-2','Erythrocyte', 'T cell',
'Megakaryocyte','B cell-3', 'B cell-4')
names(new_levels) <- levels(wbm)
DimPlot(wbm, reduction = 'umap', label = T, repel = T) + NoLegend()#new_levels
wbm <- RenameIdents(wbm, new_levels)
wbm$new_cluster_IDs <- Idents(wbm)
DimPlot(wbm, reduction = 'umap', label = T, repel = T) + NoLegend() +
ggtitle ('Integrated Dataset')wbm.less.clusters <- less.subset.wbm@meta.data[,c('Condition','RNA_snn_res.0.2','RNA_snn_res.0.7')]
less.cnt.clusters <- less.cnt@meta.data[,c('Condition','RNA_snn_res.0.2','RNA_snn_res.0.7')]
wbm.clusters <- wbm@meta.data[,c('State','seurat_clusters')]
rownames(wbm.less.clusters) <- paste0('Mpl_', rownames(wbm.less.clusters))
rownames(less.cnt.clusters) <- paste0('Nbeal_', rownames(less.cnt.clusters))
mpl.clusters <- cbind(wbm.clusters[grepl('Mpl', rownames(wbm.clusters)),], wbm.less.clusters)
nbeal.clusters <- cbind(wbm.clusters[grepl('Nbeal', rownames(wbm.clusters)),], less.cnt.clusters)mpl.tbl <- as.data.frame(table( mpl.clusters$RNA_snn_res.0.2, mpl.clusters$seurat_clusters))
colnames(mpl.tbl) <- c('Original Cluster','Integrated Cluster','Count')
mpl.tbl$Percentage <- NA
for (i in 0:length(unique(mpl.tbl$`Original Cluster`))){
mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Percentage <-
round(mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Count /
sum(wbm.less.clusters$RNA_snn_res.0.2 == i),2)*100
}
ggplot(data = mpl.tbl, aes(x = `Integrated Cluster`, y = `Original Cluster`)) +
geom_tile(aes(fill = Percentage)) +
scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
geom_text(aes(label = Count)) +
ggtitle('Migr1/Mpl Experiment (res = 0.2)')mpl.tbl <- as.data.frame(table( mpl.clusters$RNA_snn_res.0.7, mpl.clusters$seurat_clusters))
colnames(mpl.tbl) <- c('Original Cluster','Integrated Cluster','Count')
mpl.tbl$Percentage <- NA
for (i in 0:length(unique(mpl.tbl$`Original Cluster`))){
mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Percentage <-
round(mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Count /
sum(wbm.less.clusters$RNA_snn_res.0.7 == i),2)*100
}
ggplot(data = mpl.tbl, aes(x = `Integrated Cluster`, y = `Original Cluster`)) +
geom_tile(aes(fill = Percentage)) +
scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
geom_text(aes(label = Count)) +
ggtitle('Migr1/Mpl Experiment (res = 0.7)')Most of the original clusters that get up into multiple clusters in the integrated umap occur within cell type groups. For example there are original clusters that are found in both integrated cluster 3 & 13 (both B-cell clusters), and same with clusters 0, 1 & 4 (Granulocytes). Of interest is to note some original clusters being split into integrated clusters 10 & 12, erythrocytes and MKs respectivaly. These are different cell types but with a close relative progenitor MEPs, so perhaps these are some MEPs being split up with the addition of the integrate Nbeal data.
Splitting up whether we are looking at enriched/non-enrich and between Mpl and Migr1
Going to stick with the resolution of 0.3, not overload figures
mpl.clusters2 <- mpl.clusters[grepl('Migr1',mpl.clusters$State),]
mpl.tbl <- as.data.frame(table( mpl.clusters2$RNA_snn_res.0.2, mpl.clusters2$seurat_clusters))
colnames(mpl.tbl) <- c('Original Cluster','Integrated Cluster','Count')
mpl.tbl$Percentage <- NA
for (i in 0:length(unique(mpl.tbl$`Original Cluster`))){
mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Percentage <-
round(mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Count /
sum(wbm.less.clusters[grepl('Migr1',mpl.clusters$State),]$RNA_snn_res.0.2 == i),2)*100
}
ggplot(data = mpl.tbl, aes(x = `Integrated Cluster`, y = `Original Cluster`)) +
geom_tile(aes(fill = Percentage)) +
scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
geom_text(aes(label = Count)) +
ggtitle('All Migr1 Experiment (res = 0.2)')mpl.clusters2 <- mpl.clusters[grepl('enrMigr1',mpl.clusters$State),]
mpl.tbl <- as.data.frame(table( mpl.clusters2$RNA_snn_res.0.2, mpl.clusters2$seurat_clusters))
colnames(mpl.tbl) <- c('Original Cluster','Integrated Cluster','Count')
mpl.tbl$Percentage <- NA
for (i in 0:length(unique(mpl.tbl$`Original Cluster`))){
mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Percentage <-
round(mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Count /
sum(wbm.less.clusters[grepl('enrMigr1',mpl.clusters$State),]$RNA_snn_res.0.2 == i),2)*100
}
ggplot(data = mpl.tbl, aes(x = `Integrated Cluster`, y = `Original Cluster`)) +
geom_tile(aes(fill = Percentage)) +
scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
geom_text(aes(label = Count)) +
ggtitle('Enr Migr1 Experiment (res = 0.2)')mpl.clusters2 <- mpl.clusters[mpl.clusters$State == 'Migr1',]
mpl.tbl <- as.data.frame(table( mpl.clusters2$RNA_snn_res.0.2, mpl.clusters2$seurat_clusters))
colnames(mpl.tbl) <- c('Original Cluster','Integrated Cluster','Count')
mpl.tbl$Percentage <- NA
for (i in 0:length(unique(mpl.tbl$`Original Cluster`))){
mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Percentage <-
round(mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Count /
sum(wbm.less.clusters[mpl.clusters$State == 'Migr1',]$RNA_snn_res.0.2 == i),2)*100
}
ggplot(data = mpl.tbl, aes(x = `Integrated Cluster`, y = `Original Cluster`)) +
geom_tile(aes(fill = Percentage)) +
scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
geom_text(aes(label = Count)) +
ggtitle('Non-enriched Migr1 Experiment (res = 0.2)')mpl.clusters2 <- mpl.clusters[grepl('Mpl',mpl.clusters$State),]
mpl.tbl <- as.data.frame(table( mpl.clusters2$RNA_snn_res.0.2, mpl.clusters2$seurat_clusters))
colnames(mpl.tbl) <- c('Original Cluster','Integrated Cluster','Count')
mpl.tbl$Percentage <- NA
for (i in 0:length(unique(mpl.tbl$`Original Cluster`))){
mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Percentage <-
round(mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Count /
sum(wbm.less.clusters[grepl('Mpl',mpl.clusters$State),]$RNA_snn_res.0.2 == i),2)*100
}
ggplot(data = mpl.tbl, aes(x = `Integrated Cluster`, y = `Original Cluster`)) +
geom_tile(aes(fill = Percentage)) +
scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
geom_text(aes(label = Count)) +
ggtitle('All Mpl Experiment (res = 0.2)')mpl.clusters2 <- mpl.clusters[grepl('enrMpl',mpl.clusters$State),]
mpl.tbl <- as.data.frame(table( mpl.clusters2$RNA_snn_res.0.2, mpl.clusters2$seurat_clusters))
colnames(mpl.tbl) <- c('Original Cluster','Integrated Cluster','Count')
mpl.tbl$Percentage <- NA
for (i in 0:length(unique(mpl.tbl$`Original Cluster`))){
mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Percentage <-
round(mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Count /
sum(wbm.less.clusters[grepl('enrMpl',mpl.clusters$State),]$RNA_snn_res.0.2 == i),2)*100
}
ggplot(data = mpl.tbl, aes(x = `Integrated Cluster`, y = `Original Cluster`)) +
geom_tile(aes(fill = Percentage)) +
scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
geom_text(aes(label = Count)) +
ggtitle('Enr Mpl Experiment (res = 0.2)')mpl.clusters2 <- mpl.clusters[mpl.clusters$State == 'Mpl',]
mpl.tbl <- as.data.frame(table( mpl.clusters2$RNA_snn_res.0.2, mpl.clusters2$seurat_clusters))
colnames(mpl.tbl) <- c('Original Cluster','Integrated Cluster','Count')
mpl.tbl$Percentage <- NA
for (i in 0:length(unique(mpl.tbl$`Original Cluster`))){
mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Percentage <-
round(mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Count /
sum(wbm.less.clusters[mpl.clusters$State == 'Mpl',]$RNA_snn_res.0.2 == i),2)*100
}
ggplot(data = mpl.tbl, aes(x = `Integrated Cluster`, y = `Original Cluster`)) +
geom_tile(aes(fill = Percentage)) +
scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
geom_text(aes(label = Count)) +
ggtitle('Non-enriched Mpl Experiment (res = 0.2)')nbeal.tbl <- as.data.frame(table( nbeal.clusters$RNA_snn_res.0.2, nbeal.clusters$seurat_clusters))
colnames(nbeal.tbl) <- c('Original Cluster','Integrated Cluster','Count')
nbeal.tbl$Percentage <- NA
for (i in 0:length(unique(nbeal.tbl$`Original Cluster`))){
nbeal.tbl[nbeal.tbl$`Original Cluster` == i,]$Percentage <-
round(nbeal.tbl[nbeal.tbl$`Original Cluster` == i,]$Count /
sum(less.cnt.clusters$RNA_snn_res.0.2 == i),2)*100
}
ggplot(data = nbeal.tbl, aes(x = `Integrated Cluster`, y = `Original Cluster`)) +
geom_tile(aes(fill = Percentage)) +
scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
geom_text(aes(label = Count)) +
ggtitle('Nbeal Experiment (res = 0.2)')nbeal.tbl <- as.data.frame(table( nbeal.clusters$RNA_snn_res.0.7, nbeal.clusters$seurat_clusters))
colnames(nbeal.tbl) <- c('Original Cluster','Integrated Cluster','Count')
nbeal.tbl$Percentage <- NA
for (i in 0:length(unique(nbeal.tbl$`Original Cluster`))){
nbeal.tbl[nbeal.tbl$`Original Cluster` == i,]$Percentage <-
round(nbeal.tbl[nbeal.tbl$`Original Cluster` == i,]$Count /
sum(less.cnt.clusters$RNA_snn_res.0.7 == i),2)*100
}
ggplot(data = nbeal.tbl, aes(x = `Integrated Cluster`, y = `Original Cluster`)) +
geom_tile(aes(fill = Percentage)) +
scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
geom_text(aes(label = Count)) +
ggtitle('Nbeal Experiment (res = 0.7)')nbeal.clusters2 <- nbeal.clusters[nbeal.clusters$State == 'Nbeal_cntrl',]
nbeal.tbl <- as.data.frame(table( nbeal.clusters2$RNA_snn_res.0.2, nbeal.clusters2$seurat_clusters))
colnames(nbeal.tbl) <- c('Original Cluster','Integrated Cluster','Count')
nbeal.tbl$Percentage <- NA
for (i in 0:length(unique(nbeal.tbl$`Original Cluster`))){
nbeal.tbl[nbeal.tbl$`Original Cluster` == i,]$Percentage <-
round(nbeal.tbl[nbeal.tbl$`Original Cluster` == i,]$Count /
sum(less.cnt.clusters[less.cnt.clusters$Condition == 'Nbeal_cntrl',]$RNA_snn_res.0.2 == i),2)*100
}
ggplot(data = nbeal.tbl, aes(x = `Integrated Cluster`, y = `Original Cluster`)) +
geom_tile(aes(fill = Percentage)) +
scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
geom_text(aes(label = Count)) +
ggtitle('non-enrNbeal Experiment (res = 0.2)')nbeal.clusters2 <- nbeal.clusters[nbeal.clusters$State == 'enrNbeal_cntrl',]
nbeal.tbl <- as.data.frame(table( nbeal.clusters2$RNA_snn_res.0.2, nbeal.clusters2$seurat_clusters))
colnames(nbeal.tbl) <- c('Original Cluster','Integrated Cluster','Count')
nbeal.tbl$Percentage <- NA
for (i in 0:length(unique(nbeal.tbl$`Original Cluster`))){
nbeal.tbl[nbeal.tbl$`Original Cluster` == i,]$Percentage <-
round(nbeal.tbl[nbeal.tbl$`Original Cluster` == i,]$Count /
sum(less.cnt.clusters[less.cnt.clusters$Condition == 'enrNbeal_cntrl',]$RNA_snn_res.0.2 == i),2)*100
}
ggplot(data = nbeal.tbl, aes(x = `Integrated Cluster`, y = `Original Cluster`)) +
geom_tile(aes(fill = Percentage)) +
scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
geom_text(aes(label = Count)) +
ggtitle('enrNbeal Experiment (res = 0.2)')